
******************************************************
* 				Replication do-file                  *
*         Review of Economics and Statistics         *
*                                                    *
*               Choosing Your Pond                   *
*       Location Choices and Relative Income         *
*                                                    *
*		Nicolas Bottan & Ricardo Perez-Truglia       *
*                                                    *
*                   September 2020                   *
*                                                    *
******************************************************
/*

                    Notes
					
This do-file replicates Tables 1-3 from the paper. 

The analysis makes use of survey data on medical students 
participating in the NRMP, and as such is highly sensitive
and cannot be posted.

Please contact the authors if you are interested in having
them conduct any additional analysis.

*** Install Stata commands
cap: ssc install renames
cap: ssc install ivreg2
cap: ssc install ranktest 
cap: ssc install esttab
cap: ssc install outreg2
cap: ssc install parmest
cap: ssc install qqvalue
cap: ssc install quantiles
*/


*** Set WD
cd ""		// Set path for main folder here




use "Main Experiment/mainexperiment_confidential.dta", clear

*** Table 1: Location Preferences: Baseline Estimates
eststo clear

global controls "rel_res rel_qol lnpop reldens relblack reldemocrat relurban"

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls, robust
estimates store ols1

** Heterogeneity
* Non-Single vs Single
probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if single==0, robust
estimates store ols2

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if single==1, robust
estimates store ols3

local int single
gen int_px=`int'*post_relpx 
gen int_inc=`int'*post_relinc 
gen interacted=`int'

global intcontrols ""
foreach v in $controls {
	global intcontrols "${intcontrols} c.`v'#c.`int'"
	}

probit baseline_finalrank post_relpx post_relinc int_px int_inc interacted c.lndiffwage#c.interacted lndiffwage $controls $intcontrols , robust
estadd local p2=string(normal(-abs(_b[int_px]/_se[int_px]))*2, "%9.3f")
estadd local p1=string(normal(-abs(_b[int_inc]/_se[int_inc]))*2, "%9.3f")
drop int_px int_inc interacted


* Female v male
probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if male==0, robust
estimates store ols4

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if male==1, robust
estimates store ols5

local int male
gen int_px=`int'*post_relpx 
gen int_inc=`int'*post_relinc 
gen interacted=`int'

global intcontrols ""
foreach v in $controls {
	global intcontrols "${intcontrols} c.`v'#c.`int'"
	}

probit baseline_finalrank post_relpx post_relinc int_px int_inc interacted c.lndiffwage#c.interacted lndiffwage $controls $intcontrols , robust
estadd local p2=string(normal(-abs(_b[int_px]/_se[int_px]))*2, "%9.3f")
estadd local p1=string(normal(-abs(_b[int_inc]/_se[int_inc]))*2, "%9.3f")
drop int_px int_inc interacted


* Specialty wage
cap: gen highwage=perminc>229000 if perminc!=.

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if highwage==0, robust
estimates store ols6

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls if highwage==1, robust
estimates store ols7

local int highwage
gen int_px=`int'*post_relpx 
gen int_inc=`int'*post_relinc 
gen interacted=`int'

global intcontrols ""
foreach v in $controls {
	global intcontrols "${intcontrols} c.`v'#c.`int'"
	}

probit baseline_finalrank post_relpx post_relinc int_px int_inc interacted c.lndiffwage#c.interacted lndiffwage $controls $intcontrols , robust
estadd local p2=string(normal(-abs(_b[int_px]/_se[int_px]))*2, "%9.3f")
estadd local p1=string(normal(-abs(_b[int_inc]/_se[int_inc]))*2, "%9.3f")
drop int_px int_inc interacted

local t1 "  & & \multicolumn{2}{c}{By Relationship Status} & \multicolumn{2}{c}{By Gender} & \multicolumn{2}{c}{By Specialty Salary} \\ \cmidrule(lr){3-4}\cmidrule(lr){5-6}\cmidrule(lr){7-8}"
local t2 " & All & Non-Single & Single & Female & Male & \$leq\$ \$229,000  & \$ > \$ \$229,000  \\ "
local t3 " &(1)&(2)&(3)&(4)&(5)&(6)&(7) \\ \midrule"

esttab ols1 ols2 ols3 ols4 ols5 ols6 ols7 using "results/Table1.tex", se nonotes noconstant label replace booktabs gap compress keep(post_relinc post_relpx) order(post_relinc post_relpx)  b(%9.3f) se(%9.3f) star(* 0.1 ** 0.05 *** 0.01)  stats(p1 p2 r2_p N, fmt(%9.3f %9.3f %9.3f %9.0fc) labels("Relative" "Absolute" "Pseudo \$R^2\$" "Observations")) nonumbers posthead("`t1'" "`t2'" "`t3'") mlabels(none) eqlabels(none) coeflabels(post_relinc "$\beta^{ER}$" post_relpx "$\beta^{COL}$")










*** Table 2: Location Preferences: Robustness to Alternative Control Variables
eststo clear

global controls "rel_res rel_qol lnpop reldens relblack reldemocrat relurban"

* Define control groups
preserve
keep id state1 state2
reshape long state, i(id) j(j)
drop id j
bys state: keep if _n==1
tab state, gen(d_st)
tempfile sdummy
save "`sdummy'", replace
restore

gen state=state1
merge m:1 state using "`sdummy'"
drop if _mer==2
forvalues i=1/49 {
	rename d_st`i' d_st`i'_1
}
drop _mer state

gen state=state2
merge m:1 state using "`sdummy'"
drop if _mer==2
forvalues i=1/49 {
	rename d_st`i' d_st`i'_2
}
drop _mer state

forvalues i=1/49 {
	gen st_rel`i' = d_st`i'_1 - d_st`i'_2
}

local demographic "reldateage relurban relgender relhed relforeign reldens relhispanic relblack lnpop"
local amenities "rel_qol pubgood_total pubgood_educ pubgood_health lnrelcrime lnrelviolcrime reldemoc"
local geography "rel_distu"
local economic "lntax relsaletax relrent relgini"
local statedummy "st_rel*"
local objprogrchar "rel_res proguniA proguniB"
local subjprogrchar "po_purpose po_prestige po_prospects"

eststo clear

forvalues k=1/3 {

if `k'==1 {
	global ifcond ""
	}
else if `k'==2 {
	global ifcond "if single==0"
	}
else {
	global ifcond "if single==1"
	}

probit baseline_finalrank post_relinc post_relpx lndiffwage $ifcond, robust
eststo row1

probit baseline_finalrank post_relinc post_relpx lndiffwage $controls $ifcond, robust
eststo row2

probit baseline_finalrank post_relinc post_relpx lndiffwage `demographic' $ifcond, robust
eststo row3

probit baseline_finalrank post_relinc post_relpx lndiffwage `amenities' $ifcond, robust
eststo row4

probit baseline_finalrank post_relinc post_relpx lndiffwage `geography' $ifcond, robust
eststo row5

probit baseline_finalrank post_relinc post_relpx lndiffwage `economic' $ifcond, robust
eststo row6

probit baseline_finalrank post_relinc post_relpx lndiffwage `statedummy' $ifcond, robust
eststo row7

probit baseline_finalrank post_relinc post_relpx lndiffwage `objprogrchar' $ifcond, robust
eststo row8

probit baseline_finalrank post_relinc post_relpx lndiffwage `subjprogrchar' $ifcond, robust
eststo row9

probit baseline_finalrank post_relinc post_relpx lndiffwage `demographic' `amenities' `geography' `economic' `statedummy' `objprogrchar' `subjprogrchar' $ifcond, robust
eststo row10

cap: eststo drop post_relinc post_relpx r2_p
esttab row*, keep(post_relinc post_relpx) order(post_relinc post_relpx) se stat(r2_p)

mat list r(coefs)
mat define A=r(coefs)
mat list r(stats)
mat define R=r(stats)

eststo drop row*
local rnames : rownames A
local models : coleq A
local models : list uniq models

local i=0
foreach name in post_relinc post_relpx {
 local ++i
 local j 0
 cap: matrix drop b
 cap: matrix drop se
 cap: matrix drop p
 foreach model of local models {
 local ++j
 matrix tmp = A[`i', 3*`j'-2]
 if tmp[1,1]<. {
 matrix colnames tmp = `model'
 matrix b = nullmat(b), tmp
 matrix tmp[1,1] = A[`i', 3*`j'-1]
 matrix se = nullmat(se), tmp
 matrix tmp[1,1] = A[`i', 3*`j']
 matrix p = nullmat(p), tmp
 }
 }
 ereturn post b
 quietly estadd matrix se
 quietly estadd matrix p
 eststo `name'`k'

 }

cap: matrix drop b
cap: matrix drop se
cap: matrix drop p

ereturn post R
eststo PsR`k'

esttab , se mtitle noobs star(* .1 ** .05 *** .01)

cap: matrix drop A
cap: matrix drop R

}

local t1 "& \multicolumn{3}{c}{Panel A: \$ \beta^{ER} \$} & \multicolumn{3}{c}{Panel B: \$ \beta^{COL} \$} & \multicolumn{3}{c}{Pseudo \$ R^2 \$} \\ \cmidrule(lr){2-4}\cmidrule(lr){5-7}\cmidrule(lr){8-10}"
local t2 " & Non-Single & Single & All & Non-Single & Single & All & Non-Single & Single & All  \\"
local t3 " &(1)&(2)&(3)&(4)&(5)&(6)&(7)&(8)&(9)\\ \midrule"

esttab post_relinc2 post_relinc3 post_relinc1 post_relpx2 post_relpx3 post_relpx1  PsR2 PsR3 PsR1 using "results/Table2.tex", se nonotes noconstant label replace booktabs gap compress stats() b(%9.3f) se(%9.3f) star(* 0.1 ** 0.05 *** 0.01) nonumbers mlabels(none) eqlabels(none) coeflabels(row1 "No Controls" row2 "Baseline" row3 "Demographic" row4 "Amenities" row5 "Geography" row6 "Economic" row7 "State Dummies" row8 "Obj. Program Chars." row9 "Subj. Program Chars." row10 "All Controls") posthead("`t1'" "`t2'" "`t3'")
* Note: q-values for multiple hypothesis testing calculated in 02_appendixtables.do and manually added to table









*** Table 3: Location Preferences: Experimental Estimates

eststo clear

global controls "rel_res rel_qol lnpop reldens relblack reldemocrat relurban"

cap: gen relpx_diff = relpx_shown - relpx_cfact 
cap: gen relinc_diff = relinc_shown - relinc_cfact

* ALL
cap: drop RELINC0 RELPX0
gen RELINC0=post_relinc
gen RELPX0=post_relpx

probit baseline_finalrank RELPX0 RELINC0 lndiffwage $controls if finalrank!=. & lr_relpx!=., robust
eststo row1

ivprobit baseline_finalrank (RELPX0 RELINC0 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if finalrank!=. & lr_relpx!=., vce(robust)
eststo row2

ivprobit finalrank (RELINC0 RELPX0 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls gap_baseline gap_followup, vce(robust)
eststo row3

esttab, keep(RELINC0 RELPX0) order(RELINC0 RELPX0) se

mat list r(coefs)
mat define A=r(coefs)
eststo drop row*
local rnames : rownames A
local models : coleq A
local models : list uniq models

local i=0
foreach name in RELINC0 RELPX0 {
 local ++i
 local j 0
 cap: matrix drop b
 cap: matrix drop se
 cap: matrix drop p
 foreach model of local models {
 local ++j
 matrix tmp = A[`i', 3*`j'-2]
 if tmp[1,1]<. {
 matrix colnames tmp = `model'
 matrix b = nullmat(b), tmp
 matrix tmp[1,1] = A[`i', 3*`j'-1]
 matrix se = nullmat(se), tmp
 matrix tmp[1,1] = A[`i', 3*`j']
 matrix p = nullmat(p), tmp
 }
 }
 ereturn post b
 quietly estadd matrix se
 quietly estadd matrix p
 eststo `name'
 qui cou if finalrank!=. & lr_relpx!=.
 qui estadd scalar obs=r(N)

 }

esttab, se mtitle noobs star(* .1 ** .05 *** .01) stats(obs)

matrix drop A

* Non-Single
cap: drop RELINC1 RELPX1
gen RELINC1=post_relinc
gen RELPX1 = post_relpx

probit baseline_finalrank RELINC1 RELPX1 lndiffwage $controls if single==0 & finalrank!=. & lr_relpx!=., robust
eststo row1

ivprobit baseline_finalrank (RELINC1 RELPX1 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if single==0 & finalrank!=. & lr_relpx!=., vce(robust)
eststo row2

ivprobit finalrank (RELINC1 RELPX1 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls gap_baseline gap_followup if single==0 & lr_relpx!=., vce(robust)
eststo row3


esttab, keep(RELINC1 RELPX1) order(RELINC1 RELPX1) se

mat list r(coefs)
mat define A=r(coefs)
eststo drop row*
local rnames : rownames A
local models : coleq A
local models : list uniq models

local i=0
foreach name in RELINC1 RELPX1 {
 local ++i
 local j 0
 cap: matrix drop b
 cap: matrix drop se
 cap: matrix drop p
 foreach model of local models {
 local ++j
 matrix tmp = A[`i', 3*`j'-2]
 if tmp[1,1]<. {
 matrix colnames tmp = `model'
 matrix b = nullmat(b), tmp
 matrix tmp[1,1] = A[`i', 3*`j'-1]
 matrix se = nullmat(se), tmp
 matrix tmp[1,1] = A[`i', 3*`j']
 matrix p = nullmat(p), tmp
 }
 }
 ereturn post b
 quietly estadd matrix se
 quietly estadd matrix p
 eststo `name'
 qui cou if single==0 & finalrank!=. & lr_relpx!=.
 qui estadd scalar obs=r(N)
 
 }

esttab, se mtitle noobs star(* .1 ** .05 *** .01)

matrix drop A

* Single
cap: drop RELINC2 RELPX2
gen RELINC2=post_relinc
gen RELPX2= post_relpx

probit baseline_finalrank RELINC2 RELPX2 lndiffwage $controls if single==1 & finalrank!=. & lr_relpx!=., robust
eststo row1

ivprobit baseline_finalrank (RELINC2 RELPX2 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if single==1 & finalrank!=. & lr_relpx!=., vce(robust)
eststo row2

ivprobit finalrank (RELINC2 RELPX2 = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls gap_baseline gap_followup if single==1 & lr_relpx!=., vce(robust)
eststo row3

esttab, keep(RELINC2 RELPX2) order(RELINC2 RELPX2) se

mat list r(coefs)
mat define A=r(coefs)
eststo drop row*
local rnames : rownames A
local models : coleq A
local models : list uniq models

local i=0
foreach name in RELINC2 RELPX2 {
 local ++i
 local j 0
 cap: matrix drop b
 cap: matrix drop se
 cap: matrix drop p
 foreach model of local models {
 local ++j
 matrix tmp = A[`i', 3*`j'-2]
 if tmp[1,1]<. {
 matrix colnames tmp = `model'
 matrix b = nullmat(b), tmp
 matrix tmp[1,1] = A[`i', 3*`j'-1]
 matrix se = nullmat(se), tmp
 matrix tmp[1,1] = A[`i', 3*`j']
 matrix p = nullmat(p), tmp
 }
 }
 ereturn post b
 quietly estadd matrix se
 quietly estadd matrix p
 eststo `name'
 qui cou if single==1 & finalrank!=. & lr_relpx!=.
 qui estadd scalar obs=r(N)
 }

esttab, se mtitle noobs star(* .1 ** .05 *** .01)

matrix drop A

local t0 " & \multicolumn{3}{c}{Panel A: \$ \beta^{ER} \$ } & \multicolumn{3}{c}{Panel B: \$ \beta^{COL} \$} \\ \cmidrule(lr){2-4} \cmidrule(lr){5-7} "
local t1 " & Non-Single & Single  & All & Non-Single & Single & All \\"
local t2 " &(1)&(2)&(3)&(4)&(5)&(6) \\ \midrule"

esttab  RELINC1 RELINC2 RELINC0  RELPX1 RELPX2 RELPX0 using "results/Table3.tex", se nonotes noconstant label replace booktabs gap compress  b(%9.3f) se(%9.3f) star(* 0.1 ** 0.05 *** 0.01) nonumbers mlabels(none) eqlabels(none) coeflabels(row1 "Baseline" row2 "Experimental" row3 "Experimental, Long Term") posthead("`t0'" "`t1'" "`t2'")




** Simulation for Falsification (row 4 of table)
use "Main Experiment/mainexperiment_confidential.dta", clear

set matsize 11000

global controls "rel_res rel_qol lnpop reldens relblack reldemocrat relurban"

cap: gen relpx_diff = relpx_shown - relpx_cfact 
cap: gen relinc_diff = relinc_shown - relinc_cfact

set seed 123

matrix define B = J(1000,6,.)

forvalues i=1/1000 {

preserve
gen b1=substr(prog1,1,1)
gen b2=substr(prog2,1,1)
keep id b1 b2

reshape long b, i(id) j(j)

sort id
gen rnd=uniform()

sort id rnd

drop rnd
bys id: keep if _n==1

rename j firstprog
keep id firstprog
tempfile aux
save "`aux'", replace
restore

merge 1:1 id using "`aux'", nogen

gen placebo=firstprog==1

* ALL
cap: drop RELINC RELPX
gen RELINC=post_relinc
gen RELPX=post_relpx

cap: ivprobit placebo (RELPX RELINC = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if finalrank!=. & lr_relpx!=., vce(robust)
matrix B[`i',1]=_b[RELINC]
matrix B[`i',2]=_b[RELPX]

cap: ivprobit placebo (RELINC RELPX = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if single==0 & finalrank!=. & lr_relpx!=., vce(robust)
matrix B[`i',3]=_b[RELINC]
matrix B[`i',4]=_b[RELPX]

cap: ivprobit placebo (RELINC RELPX = relinc_diff relpx_diff) relpx_cfact relinc_cfact lndiffwage $controls if single==1 & finalrank!=. & lr_relpx!=., vce(robust)
matrix B[`i',5]=_b[RELINC]
matrix B[`i',6]=_b[RELPX]

drop placebo firstprog

}

clear

svmat B
save results/simulation_output, replace

sum  B3 B5 B1  B4 B6 B2
* Point estimates and SE added manually to .tex table

